location='https://github.com/cqregan/uw_projects/raw/main/'
file='Use_Of_Force.csv'
link=paste0(location,file)
link
## [1] "https://github.com/cqregan/uw_projects/raw/main/Use_Of_Force.csv"
#getting the data TABLE from the file in the cloud:
df = read.csv(file=url(link))
df
names(df)
##  [1] "ID"                "Incident_Num"      "Incident_Type"    
##  [4] "Occured_date_time" "Precinct"          "Sector"           
##  [7] "Beat"              "Officer_ID"        "Subject_ID"       
## [10] "Subject_Race"      "Subject_Gender"
str(df,width = 70,strict.width='cut')
## 'data.frame':    15120 obs. of  11 variables:
##  $ ID               : chr  "2015UOF-1456-1964-5409" "2017UOF-0168-1"..
##  $ Incident_Num     : int  6509 14471 5372 40328 18450 1168 44273 14..
##  $ Incident_Type    : chr  "Level 1 - Use of Force" "Level 1 - Use "..
##  $ Occured_date_time: chr  "08/13/2015 01:57:00 AM" "02/13/2017 09:"..
##  $ Precinct         : chr  "North" "North" "East" "South" ...
##  $ Sector           : chr  "NORA" "JOHN" "EDWARD" "OCEAN" ...
##  $ Beat             : chr  "N3" "J2" "E2" "O1" ...
##  $ Officer_ID       : int  1871 1238 1844 1576 1665 872 1776 1709 10..
##  $ Subject_ID       : int  5370 11804 4495 19125 14495 1127 20044 12..
##  $ Subject_Race     : chr  "Black or African American" "Black or Af"..
##  $ Subject_Gender   : chr  "Female" "Male" "Female" "Female" ...
head(df$Precinct,20)
##  [1] "North"     "North"     "East"      "South"     "North"     "Southwest"
##  [7] "North"     "South"     "North"     "X"         "-"         "East"     
## [13] "East"      "North"     "East"      "North"     "West"      "West"     
## [19] "-"         "South"
absoluteT=table(df$Precinct,
                exclude = 'nothing') 
absoluteT
## 
##         -         0      East     North       OOJ     South Southwest      West 
##       583        11      2962      3419        58      2532       857      4327 
##         X 
##       371
prop.table(absoluteT)
## 
##            -            0         East        North          OOJ        South 
## 0.0385582011 0.0007275132 0.1958994709 0.2261243386 0.0038359788 0.1674603175 
##    Southwest         West            X 
## 0.0566798942 0.2861772487 0.0245370370
propT=prop.table(absoluteT)*100

propT
## 
##           -           0        East       North         OOJ       South 
##  3.85582011  0.07275132 19.58994709 22.61243386  0.38359788 16.74603175 
##   Southwest        West           X 
##  5.66798942 28.61772487  2.45370370
names(absoluteT)[1]='Unknown'
names(absoluteT)[2]='Unknown'
names(absoluteT)[9]='Unknown'
dfCleaned = df
dfCleaned["Precinct"][dfCleaned["Precinct"] == "X"] <- "Unknown"
dfCleaned["Precinct"][dfCleaned["Precinct"] == "0"] <- "Unknown"
dfCleaned["Precinct"][dfCleaned["Precinct"] == "-"] <- "Unknown"
dfCleaned
absoluteT=table(dfCleaned$Precinct,
                exclude = 'nothing') 
dfCleaned
prop.table(absoluteT)
## 
##        East       North         OOJ       South   Southwest     Unknown 
## 0.195899471 0.226124339 0.003835979 0.167460317 0.056679894 0.063822751 
##        West 
## 0.286177249
propT=prop.table(absoluteT)*100
propT
## 
##       East      North        OOJ      South  Southwest    Unknown       West 
## 19.5899471 22.6124339  0.3835979 16.7460317  5.6679894  6.3822751 28.6177249
(tableFreq=as.data.frame(absoluteT))
names(tableFreq)=c("Precinct","Count")
tableFreq$Percent=as.vector(propT)
tableFreq
library(ggplot2)
base= ggplot(data = tableFreq, 
             aes(x = Precinct,
                 y = Count)) 
plot1 = base + geom_bar(fill ="gray",
                        stat = 'identity') 
plot1

titleText='How frequently do police precincts in Seattle use force?'
sourceText='Source: City of Seattle'

plot2 = plot1 + labs(title=titleText,
                     x =NULL, 
                     y = NULL,
                     caption = sourceText) 
plot2

tableFreq=tableFreq[order(tableFreq$Percent),]
# then:
tableFreq
(PrecinctOrd=tableFreq[order(tableFreq$Percent),'Precinct'])
## [1] OOJ       Southwest Unknown   South     East      North     West     
## Levels: East North OOJ South Southwest Unknown West
plot2

base= base + scale_x_discrete(limits=PrecinctOrd) 
base= base + theme_classic()
##
plot1 = base + geom_bar(fill ="gray",
                        stat = 'identity') 
plot2 = plot1 + labs(title=titleText,
                     x =NULL, 
                     y = NULL,
                     caption = sourceText)

plot2

str(df,width = 50,strict.width='cut')
## 'data.frame':    15120 obs. of  11 variables:
##  $ ID               : chr  "2015UOF-1456-1964-5"..
##  $ Incident_Num     : int  6509 14471 5372 40328..
##  $ Incident_Type    : chr  "Level 1 - Use of Fo"..
##  $ Occured_date_time: chr  "08/13/2015 01:57:00"..
##  $ Precinct         : chr  "North" "North" "Eas"..
##  $ Sector           : chr  "NORA" "JOHN" "EDWAR"..
##  $ Beat             : chr  "N3" "J2" "E2" "O1" ...
##  $ Officer_ID       : int  1871 1238 1844 1576 1..
##  $ Subject_ID       : int  5370 11804 4495 19125..
##  $ Subject_Race     : chr  "Black or African Am"..
##  $ Subject_Gender   : chr  "Female" "Male" "Fem"..
dfCleaned[['Occured_date_time']] <- as.Date(dfCleaned[['Occured_date_time']], format = "%m/%d/%y")
dfCleaned
baseHY=ggplot(data=dfCleaned,
            aes(x=Occured_date_time)) 
histHY=baseHY + geom_histogram(aes(fill=Precinct), 
                              color='black')
histHY
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.